On this page, we will be exploring the overall trends for our key outcome- sleeping hours per day, and how these outcomes correlate with the predictors, including demographic variables like age, race, sex, education level, and poverty status.

library(tidyverse)
library(patchwork)
library(knitr)
library(dplyr)
library(gganimate)
library(gifski)
library(png)
library(plotly)
library(ggridges)

Education level

We first want to get the distribution of sleeping hours less than 7 hours across the five different education levels. We will construct a bar chart tabulating the average sleeping hours per week in each of the five education levels. Gender consideration was also added into to the bar chart in order to see a difference between female and male in each category.

 edu_plot=slp_df %>%
  filter(weekday_slp_hr<7)%>%
  group_by(education_level,gender) %>%
  summarize(ave_sleep=mean((weekday_slp_hr*5+weekend_slp_hr*2)/7)) %>% 
  ungroup() %>%
  mutate(education_level=fct_reorder(education_level,ave_sleep)) %>%
  ggplot(aes(x=education_level,y=ave_sleep,fill=gender))+ geom_bar(width=0.5,stat="identity")+
  viridis::scale_fill_viridis(
    name = "gender",
    discrete = TRUE
  ) + geom_text(aes(label = round(ave_sleep, 2)),position = position_stack(vjust=0.9), color = "white", size = 4)+
  theme(axis.text.x = element_text(angle = -90, vjust = 0.5, hjust=1))+labs(
    title = "Distribution of sleeping hours across education level",
    x = "Education Level",
    y = "Average Sleeping hours per day"
    )
edu_plot

From the above plot, high school graduates has the least sleeping hours, while college graduates or above has the highest. Female and male does not have a significant difference in both groups.

Race

A heat map was made to visualize sleeping hours less than 7 hours among different races. The below plot indicates that there is a large number of people who has a sleeping hour of 6 hours per day among the Non-Hispanic black group. There are also many Non-Hispanic Whites who has an average of 6 hours per day.

race_plot=slp_df %>%
  filter(weekday_slp_hr<7) %>%
  mutate(sleep_ave=(weekday_slp_hr*5+weekend_slp_hr*2)/7) %>%
  group_by(race,sleep_ave) %>%
  summarise(obs=n()) %>%
  plot_ly(
    x = ~sleep_ave, y = ~race, z = ~obs, type = "heatmap", colors = "BuPu"
  ) %>%
  colorbar(title = "Number of People", x = 1, y = 0.5) 
layout(race_plot, xaxis = list(title = "Average Sleeping Hours Per Day"), yaxis = list(title = "Race"))

Race Gender Gap by Education Level

The below plot demonstrates the gender gap in the patients for different races. Male outnumber female for sleeping less than 7 hours per day for all race, except Non-Hispanic Black and Non-Hispanic Asian. The bubble represents the degree of the gap, along inlcuding their education level.

gender_plot=slp_df %>%
  filter(weekday_slp_hr<7) %>%
  group_by(race,education_level) %>%
  summarize(total_f=sum(gender=="female"),
            total_m=sum(gender=="male"),
            gap=total_m-total_f) %>%
  mutate(text_lable=str_c("Race=",race,"\nEducation level: ", education_level)) %>%
  plot_ly(x=~total_m,y=~total_f,text=~text_lable,color=~race,size=~gap,type="scatter",mode="markers",
          colors="viridis",sizes = c(50, 700), marker = list(opacity = 0.7))

layout(gender_plot, title = "Race Gender Gap by Education Level", xaxis = list(title = "Number of Male Sleeping less than 7 hrs"), yaxis = list(title = "Number of Female Sleeping less than 7 hrs"))

Poverty Status

income_df=slp_df %>%
  filter(weekday_slp_hr<7) %>%
  mutate(ip_stat=case_when(income_poverty_ratio > 1 ~ "not in poverty",
                           income_poverty_ratio < 1~ "in poverty",
                           income_poverty_ratio == 1~ "in poverty")) %>%
  ggplot(aes(x=weekday_slp_hr,y=ip_stat,fill=ip_stat))+
  geom_density_ridges(
    aes(point_color = ip_stat, point_shape = ip_stat,point_fill=ip_stat),
    alpha = .3, point_alpha = 0.7)+
   scale_x_continuous(
    breaks = c(2, 4, 6), 
    labels = c("2hrs", "4hrs", "6hrs"),
    limits = c(2, 6)
    )
  
box_plot=
  slp_df %>%
  filter(weekday_slp_hr<6) %>%
  mutate(ip_stat=case_when(income_poverty_ratio > 1 ~ "not in poverty",
                           income_poverty_ratio < 1~ "in poverty",
                           income_poverty_ratio == 1~ "in poverty")) %>%
  mutate(sleep_ave=(weekday_slp_hr*5+weekend_slp_hr*2)/7) %>%
  ggplot(aes(x=ip_stat,y=sleep_ave))+geom_boxplot(aes(fill = ip_stat), alpha = 0.3)+
  geom_hline(aes(yintercept=median(sleep_ave),
            color="red", linetype="dashed"))+
  geom_text(aes(0, median(weekday_slp_hr), label = "sleep hours median"), vjust = -0.5, hjust = 0, color = "red")

comb=income_df+box_plot
comb+plot_annotation(
  title = "Sleeping Hours By Poverty Status"
) 

age_group= slp_df%>%
  filter(weekday_slp_hr<7) %>%
  mutate(age_gp=case_when(age>=20 & age<=30 ~ "20-30",
                          age>=31 &age <=40 ~ "31-40",
                          age>=41 &age<=50 ~ "41-50",
                          age>=51 &age<=60 ~ "51-60",
                          age>=61 &age<=70 ~ "61-70",
                          age>=71 & age <=80 ~ "71-80")) %>%
  group_by(age_gp) %>%
  summarise(ave_slp=mean((weekday_slp_hr*5+weekend_slp_hr*2)/7))%>%
  ungroup() %>%
  mutate(age_gp=fct_reorder(age_gp,ave_slp)) %>%
  ggplot(aes(x=age_gp,y=ave_slp,fill=age_gp))+ geom_bar(stat="identity")+ scale_fill_viridis_d()+
  theme(axis.text.x = element_text(angle = -90, vjust = 0.5, hjust=1))+
  geom_text(aes(label = round(ave_slp, 2)),position = position_stack(vjust=0.9), color = "white", size = 4)
age_group